This document describes how to go from raw vcftools output of diversity metrics (Fst, pi, and Tajima’s D) to Manhattan plots, including making figures for the manuscript associated with this repo. This script is a result of brute forcing things to work: I am certainly no expert, and thus lots of the notes refer to my own dumb mistakes.
library(tidyverse)
library(qqman)
library(scales)
First, read in raw output files from vcftools (in this repo, see filter-scan.sh for code to generate).
fst.UKUS.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_UKUS_50kb.windowed.weir.fst",sep="\t"))
fst.AUUK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_AUUK_50kb.windowed.weir.fst",sep="\t"))
fst.USAU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_AUUS_50kb.windowed.weir.fst",sep="\t"))
pi.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_UK_pi_50kb.windowed.pi",sep="\t"))
pi.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_AU_pi_50kb.windowed.pi",sep="\t"))
pi.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_US_pi_50kb.windowed.pi",sep="\t"))
TajD.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_UK_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_AU_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_US_TajimaD_50kb.Tajima.D",sep="\t"))
vcftools outputs a few identifies for position: “CHROM,” “BIN_START” and “BIN_END” for .fst and .pi files, but only “CHROM” AND “BIN_START” for .Tajima.D files. Unfortunately, the numbering is off, so we’ll add 1 to every “BIN_START” in the .Tajima.D files.
head(TajD.AU.50kb)
## # A tibble: 6 x 4
## CHROM BIN_START N_SNPS TajimaD
## <fct> <int> <int> <dbl>
## 1 10 0 147 0.583
## 2 10 50000 334 0.372
## 3 10 100000 301 0.836
## 4 10 150000 98 1.56
## 5 10 200000 102 -0.557
## 6 10 250000 139 -0.0356
head(fst.AUUK.50kb)
## # A tibble: 6 x 6
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST MEAN_FST
## <fct> <int> <int> <int> <dbl> <dbl>
## 1 10 1 50000 164 0.00792 0.0105
## 2 10 50001 100000 379 0.0595 0.0500
## 3 10 100001 150000 311 0.00728 -0.000459
## 4 10 150001 200000 120 0.0162 0.0275
## 5 10 200001 250000 121 0.126 0.0926
## 6 10 250001 300000 155 0.109 0.0747
TajD.UK.50kb$BIN_START <- TajD.UK.50kb$BIN_START + 1
TajD.AU.50kb$BIN_START <- TajD.AU.50kb$BIN_START + 1
TajD.US.50kb$BIN_START <- TajD.US.50kb$BIN_START + 1
Now BIN_START should match. To use dplyr, we’ll need a column that specifes a unique position in the genome. Create a new column that joins “CHROM” and “BIN_START” so that we match each value based on actual position in the genome.
fst.UKUS.50kb$POS_ID <- paste(fst.UKUS.50kb$CHROM,fst.UKUS.50kb$BIN_START,sep="-")
fst.AUUK.50kb$POS_ID <- paste(fst.AUUK.50kb$CHROM,fst.AUUK.50kb$BIN_START,sep="-")
fst.USAU.50kb$POS_ID <- paste(fst.USAU.50kb$CHROM,fst.USAU.50kb$BIN_START,sep="-")
pi.UK.50kb$POS_ID <- paste(pi.UK.50kb$CHROM,pi.UK.50kb$BIN_START,sep="-")
pi.AU.50kb$POS_ID <- paste(pi.AU.50kb$CHROM,pi.AU.50kb$BIN_START,sep="-")
pi.US.50kb$POS_ID <- paste(pi.US.50kb$CHROM,pi.US.50kb$BIN_START,sep="-")
TajD.UK.50kb$POS_ID <- paste(TajD.UK.50kb$CHROM,TajD.UK.50kb$BIN_START,sep="-")
TajD.AU.50kb$POS_ID <- paste(TajD.AU.50kb$CHROM,TajD.AU.50kb$BIN_START,sep="-")
TajD.US.50kb$POS_ID <- paste(TajD.US.50kb$CHROM,TajD.US.50kb$BIN_START,sep="-")
Now drop column names that we no longer need, otherwise we’ll have a giant table after all the merging.
fst.AUUK.50kb <- fst.AUUK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
fst.USAU.50kb <- fst.USAU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.UK.50kb <- pi.UK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.AU.50kb <- pi.AU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.US.50kb <- pi.US.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
TajD.UK.50kb <- TajD.UK.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.AU.50kb <- TajD.AU.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.US.50kb <- TajD.US.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
We’ll join tables based on the unique position identified (POS_ID). The rename() command ensures that each new column header specifies population (new name first). Heads up: each time you join two tables, those two tables are no longer accessible except as a joined table!
fstUKUS.fstAUUK.50kb <- left_join(fst.UKUS.50kb,fst.AUUK.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UKUS","_AUUK"))
fst.50kb <- left_join(fstUKUS.fstAUUK.50kb,fst.USAU.50kb, by = "POS_ID", copy = FALSE, suffix=c("","_USAU"))
fst.50kb <- rename(fst.50kb, WEIGHTED_FST_USAU = WEIGHTED_FST)
fst.50kb <- rename(fst.50kb, MEAN_FST_USAU = MEAN_FST)
fst.50kb.piUK <- left_join(fst.50kb,pi.UK.50kb, by = "POS_ID", copy = FALSE)
fst.50kb.piUK.piUS <- left_join(fst.50kb.piUK,pi.US.50kb, by = "POS_ID", copy = FALSE,suffix=c("_UK","_US"))
fst.pi.50kb <- left_join(fst.50kb.piUK.piUS,pi.AU.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb <- rename(fst.pi.50kb, PI_AU = PI)
fst.pi.50kb.TajDUK <- left_join(fst.pi.50kb,TajD.UK.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb.TajDUK.TajDUS <- left_join(fst.pi.50kb.TajDUK,TajD.US.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UK","_US"))
div <- left_join(fst.pi.50kb.TajDUK.TajDUS,TajD.AU.50kb, by = "POS_ID", copy = FALSE)
div <- rename(div, TajimaD_AU = TajimaD)
We’re going to drop data from: * small scaffolds (which conveniently start with ‘KQ’ or ‘LNCF’, * any rows (positions) with missing data (NA), * and also coerce “negative” FST or pi to zero. We replace POS_ID (which was converted to numeric) with SNP. For the qqman package, chromosomes need to be a numeric value, so we also use lapply() below to rename chromosomes.
div <- filter(div, !grepl('KQ',CHROM))
div <- filter(div, !grepl('LNCF',CHROM))
div <- filter(div, !grepl('Unknown',CHROM))
div <- div %>% drop_na()
div[,c(5:6)][div[,c(5:6)] < 0] <- 0
div[,c(8:14)][div[,c(8:14)] < 0] <- 0
div <- data.frame(lapply(div, function(x) {gsub("1A", "1.25", x)}))
div <- data.frame(lapply(div, function(x) {gsub("1B", "1.75", x)}))
div <- data.frame(lapply(div, function(x) {gsub("4A", "4.5", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LG5", "28", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LGE22", "29", x)}))
div <- data.frame(lapply(div, function(x) {gsub("Z", "0", x)}))
indx <- sapply(div, is.factor)
div[indx] <- lapply(div[indx], function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
div <- div %>% select(-POS_ID)
div$SNP <- seq.int(nrow(div))
str(div)
## 'data.frame': 20071 obs. of 17 variables:
## $ CHROM : num 10 10 10 10 10 10 10 10 10 10 ...
## $ BIN_START : num 1 50001 100001 150001 200001 ...
## $ BIN_END : num 50000 100000 150000 200000 250000 300000 350000 400000 450000 500000 ...
## $ N_VARIANTS : num 165 377 311 120 121 156 80 101 24 18 ...
## $ WEIGHTED_FST_UKUS: num 0.01745 0.03751 0 0 0.00586 ...
## $ MEAN_FST_UKUS : num 0.0133 0.0272 0 0 0 ...
## $ WEIGHTED_FST_AUUK: num 0.00792 0.05948 0.00728 0.01624 0.12564 ...
## $ MEAN_FST_AUUK : num 0.0105 0.05 0 0.0275 0.0926 ...
## $ WEIGHTED_FST_USAU: num 0.01291 0.00258 0 0 0.00315 ...
## $ MEAN_FST_USAU : num 0.0093 0.00585 0 0 0 ...
## $ PI_UK : num 0.001053 0.002189 0.002212 0.000915 0.000879 ...
## $ PI_US : num 0.001063 0.002373 0.002003 0.000829 0.0007 ...
## $ PI_AU : num 0.001024 0.002211 0.0022 0.000804 0.000532 ...
## $ TajimaD_UK : num 0.808 0.293 0.877 1.426 0.968 ...
## $ TajimaD_US : num 0.42348 0.58012 0.80239 1.05163 -0.00424 ...
## $ TajimaD_AU : num 0.583 0.372 0.836 1.557 -0.557 ...
## $ SNP : int 1 2 3 4 5 6 7 8 9 10 ...
Adding in a calculation here so that we don’t have to repeat it when filtering div table.
div$piUK.piAU <- div$PI_UK - div$PI_AU
div$piUK.piUS <- div$PI_UK - div$PI_US
Now we’re ready to plot and calculate genome-wide values!
First, we look at the distribution of variation across the genome.
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Fst.UKUS.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen
## 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Fst.AUUK.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen
## 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.USAU.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen
## 2
quantile(div$WEIGHTED_FST_AUUK, c(.9,.95,.99,.999))
## 90% 95% 99% 99.9%
## 0.0676462 0.0862444 0.1490122 0.3077638
quantile(div$WEIGHTED_FST_UKUS, c(.9,.95,.99,.999))
## 90% 95% 99% 99.9%
## 0.0441469 0.0609766 0.1156152 0.2228447
mean(div$WEIGHTED_FST_AUUK) + 5*sd(div$WEIGHTED_FST_AUUK)
## [1] 0.1936444
mean(div$WEIGHTED_FST_UKUS) + 5*sd(div$WEIGHTED_FST_UKUS)
## [1] 0.1406712
div.outliers.AUUK <- div[which(div$WEIGHTED_FST_AUUK > quantile(div$WEIGHTED_FST_AUUK,.99)),]
div.outliers.USUK <- div[which(div$WEIGHTED_FST_UKUS > quantile(div$WEIGHTED_FST_UKUS,.99)),]
div.hifst.AUUK <- div[which(div$WEIGHTED_FST_AUUK > 0.1),]
div.hifst.UKUS <- div[which(div$WEIGHTED_FST_UKUS > 0.1),]
unique(div.outliers.USUK$CHROM)
## [1] 11.00 12.00 13.00 17.00 19.00 1.25 1.00 28.00 2.00 3.00 4.50 4.00
## [13] 6.00 29.00 0.00
length(div.outliers.USUK$SNP)
## [1] 201
write.csv(div.outliers.USUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.USUK.csv")
unique(div.outliers.AUUK$CHROM)
## [1] 10.00 12.00 13.00 17.00 18.00 1.25 1.00 23.00 27.00 2.00 3.00 4.50
## [13] 4.00 5.00 6.00 7.00 8.00 0.00
length(div.outliers.AUUK$SNP)
## [1] 201
write.csv(div.outliers.AUUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.AUUK.csv")
What’s going on w/ other metrics at these outliers?
summary(div.outliers.AUUK$PI_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0000730 0.0001890 0.0009734 0.0014188 0.0057224
summary(div.outliers.AUUK$PI_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.383e-05 7.067e-05 2.285e-04 9.324e-04 1.447e-03 5.313e-03
summary(div.outliers.AUUK$TajimaD_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.3050 0.2574 0.7501 0.6479 1.2528 2.6922
summary(div.outliers.AUUK$TajimaD_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.2937 0.1481 0.6493 0.5926 1.0098 2.3588
summary(div.outliers.USUK$PI_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.167e-06 6.250e-05 2.890e-04 8.204e-04 1.281e-03 5.202e-03
summary(div.outliers.USUK$PI_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000210 0.0000765 0.0002912 0.0008650 0.0012335 0.0058409
summary(div.outliers.USUK$TajimaD_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.1488 -0.4076 0.4343 0.3082 0.9422 2.9728
summary(div.outliers.USUK$TajimaD_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.9252 0.3047 0.8963 0.8645 1.4612 2.5895
library(fitdistrplus)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: survival
descdist(div.outliers.AUUK$PI_UK)
## summary statistics
## ------
## min: 2.5e-06 max: 0.00572238
## median: 0.000189
## mean: 0.0009734123
## estimated sd: 0.001348425
## estimated skewness: 1.597318
## estimated kurtosis: 4.653
descdist(div.outliers.AUUK$PI_AU)
## summary statistics
## ------
## min: 1.38333e-05 max: 0.00531275
## median: 0.0002285
## mean: 0.0009324382
## estimated sd: 0.001252321
## estimated skewness: 1.532558
## estimated kurtosis: 4.458218
descdist(div.outliers.USUK$PI_UK)
## summary statistics
## ------
## min: 2.1e-05 max: 0.00584092
## median: 0.000291167
## mean: 0.000865046
## estimated sd: 0.001150262
## estimated skewness: 1.693987
## estimated kurtosis: 5.349374
descdist(div.outliers.USUK$PI_US)
## summary statistics
## ------
## min: 7.16667e-06 max: 0.00520175
## median: 0.000289007
## mean: 0.0008203641
## estimated sd: 0.001078227
## estimated skewness: 1.65203
## estimated kurtosis: 5.131592
#beta.outliers.AUUK <- fitdist(div.outliers.AUUK$PI_AU, "beta")
#summary(beta.outliers.AUUK)
#beta.outliers.USUK <- fitdist(div.outliers.USUK$PI_US, "beta")
#summary(beta.outliers.USUK)
div.outliers.AUUK.lowFstUSAU <- div.outliers.AUUK[which(div.outliers.AUUK$WEIGHTED_FST_USAU < 0.01 ),]
unique(div.outliers.AUUK.lowFstUSAU$CHROM)
## [1] 10.00 12.00 13.00 17.00 1.25 4.50 6.00 0.00
length(div.outliers.AUUK.lowFstUSAU$SNP)
## [1] 21
div.outliers.USUK.lowFstUSAU <- div.outliers.USUK[which(div.outliers.USUK$WEIGHTED_FST_USAU < 0.01 ),]
unique(div.outliers.USUK.lowFstUSAU$CHROM)
## [1] 12.00 1.25 1.00 4.50 4.00 6.00 0.00
length(div.outliers.USUK.lowFstUSAU$SNP)
## [1] 39
intersect(unique(div.outliers.AUUK.lowFstUSAU$CHROM),unique(div.outliers.USUK.lowFstUSAU$CHROM))
## [1] 12.00 1.25 4.50 6.00 0.00
Possible parallel “selection” ?
What’s the statistical distribution of these values?
descdist(div$WEIGHTED_FST_AUUK)
## summary statistics
## ------
## min: 0 max: 0.401902
## median: 0.0265902
## mean: 0.03258447
## estimated sd: 0.03221199
## estimated skewness: 2.902543
## estimated kurtosis: 19.80308
descdist(div$WEIGHTED_FST_UKUS)
## summary statistics
## ------
## min: 0 max: 0.34149
## median: 0.0125022
## mean: 0.01873338
## estimated sd: 0.02438756
## estimated skewness: 3.325395
## estimated kurtosis: 22.98486
descdist(div$WEIGHTED_FST_USAU)
## summary statistics
## ------
## min: 0 max: 0.482831
## median: 0.0341419
## mean: 0.04037524
## estimated sd: 0.03744596
## estimated skewness: 3.350824
## estimated kurtosis: 25.99446
lab.AU <- rep("AU.UK",length(div$WEIGHTED_FST_AUUK))
lab.US <- rep("UK.US",length(div$WEIGHTED_FST_UKUS))
Fst.group <- c(lab.AU,lab.US)
Fst.hist.data <- c(div$WEIGHTED_FST_AUUK,div$WEIGHTED_FST_USUK)
Fst.hist <- data.frame(Fst = Fst.hist.data, population = Fst.group)
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Fst.pdf",width=4,height=3)
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
theme_classic() +
geom_density(alpha=0.5,lwd=0.5) +
scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) +
xlab("Fst") + labs(fill="Population") +
geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)
dev.off()
## quartz_off_screen
## 2
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
theme_classic() +
geom_density(alpha=0.5,lwd=0.5) +
scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) +
xlab("Fst") + labs(fill="Population") +
geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.USUK.pdf",height=2,width=2)
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen
## 2
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.AUUK.pdf",height=2,width=2)
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen
## 2
summary(div$PI_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0027682 0.0040090 0.0038645 0.0050565 0.0138998
summary(div$PI_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0028343 0.0041312 0.0039757 0.0051943 0.0141869
summary(div$PI_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.667e-06 2.753e-03 4.016e-03 3.863e-03 5.045e-03 1.409e-02
descdist(div$PI_US)
## summary statistics
## ------
## min: 4.66667e-06 max: 0.0140869
## median: 0.00401555
## mean: 0.003862564
## estimated sd: 0.001783127
## estimated skewness: -0.1258447
## estimated kurtosis: 3.05688
#norm.piUS.fit <- fitdist(div$PI_US,distr="norm")
#summary(norm.piUS.fit)
descdist(div$PI_UK)
## summary statistics
## ------
## min: 2.5e-06 max: 0.0141869
## median: 0.00413121
## mean: 0.003975727
## estimated sd: 0.001837387
## estimated skewness: -0.1188124
## estimated kurtosis: 3.055468
#norm.piUK.fit <- fitdist(div$PI_UK,distr="norm")
#summary(norm.piUK.fit)
descdist(div$PI_AU)
## summary statistics
## ------
## min: 2.5e-06 max: 0.0138998
## median: 0.00400898
## mean: 0.003864496
## estimated sd: 0.001775347
## estimated skewness: -0.1396863
## estimated kurtosis: 3.030743
#norm.piAU.fit <- fitdist(div$PI_AU,distr="norm")
#summary(norm.piAU.fit)
lab.AU <- rep("AU",length(div$PI_AU))
lab.US <- rep("US",length(div$PI_US))
lab.UK <- rep("UK",length(div$PI_UK))
group <- c(lab.AU,lab.US,lab.UK)
pi.hist.data <- c(div$PI_UK,div$PI_US,div$PI_AU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame': 60213 obs. of 2 variables:
## $ pi : num 0.001053 0.002189 0.002212 0.000915 0.000879 ...
## $ population: Factor w/ 3 levels "AU","UK","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) +
xlab("Pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
theme(legend.position="none")
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Pi.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) +
xlab("Pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
theme(legend.position="none")
dev.off()
## quartz_off_screen
## 2
Average nucleotide diversity for both invasions is the same (0.003). There are two vertical lines overlaid in the plot above.
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_USvsUK.pdf",width=2,height=2)
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
ggplot(data=div.outliers.USUK) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/PiOutliers_USvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.USUK) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
ggplot(data=div.outliers.AUUK) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).
dev.off()
## null device
## 1
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.AUUK) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).
dev.off()
## null device
## 1
descdist(div$TajimaD_US)
## summary statistics
## ------
## min: -2.1488 max: 3.16509
## median: 0.744616
## mean: 0.7115798
## estimated sd: 0.3466121
## estimated skewness: -1.309557
## estimated kurtosis: 13.47233
descdist(div$TajimaD_UK)
## summary statistics
## ------
## min: -2.2276 max: 3.23445
## median: 0.737875
## mean: 0.7152536
## estimated sd: 0.3263563
## estimated skewness: -0.4741872
## estimated kurtosis: 12.17869
descdist(div$TajimaD_AU)
## summary statistics
## ------
## min: -2.33194 max: 3.16824
## median: 0.798065
## mean: 0.7829885
## estimated sd: 0.3372602
## estimated skewness: -0.5499719
## estimated kurtosis: 12.87531
descdist(div.outliers.USUK$TajimaD_US)
## summary statistics
## ------
## min: -2.1488 max: 2.9728
## median: 0.434343
## mean: 0.3082123
## estimated sd: 1.017184
## estimated skewness: -0.2567428
## estimated kurtosis: 2.728269
descdist(div.outliers.AUUK$TajimaD_AU)
## summary statistics
## ------
## min: -2.30499 max: 2.69219
## median: 0.750086
## mean: 0.6478708
## estimated sd: 0.8718095
## estimated skewness: -0.7820734
## estimated kurtosis: 3.98327
What’s the difference in diversity between native and invasive ranges?
When comparing ancestral diversity to a bottlenecked invasive population, we’d expect to see lower diversity in the invasive range overall. This difference would be more pronounced in regions that had differentiated from the native range (e.g., where drift and/or selection are more pronounced).
Here, difference in pi > 0 means that diversity is higher in the native range than in the invasive.
summary(div$piUK.piAU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.552e-03 -1.935e-05 9.278e-05 1.112e-04 2.319e-04 2.028e-03
qqnorm(div$piUK.piAU, pch = 16)
qqline(div$piUK.piAU, pch = 16)
descdist(div$piUK.piAU)
## summary statistics
## ------
## min: -0.00155244 max: 0.00202796
## median: 9.278e-05
## mean: 0.0001112317
## estimated sd: 0.0002326892
## estimated skewness: 0.4570582
## estimated kurtosis: 5.950369
summary(div$piUK.piUS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.381e-03 -2.535e-06 1.035e-04 1.132e-04 2.277e-04 1.762e-03
qqnorm(div$piUK.piUS, pch = 16)
qqline(div$piUK.piUS, pch = 16)
descdist(div$piUK.piUS)
## summary statistics
## ------
## min: -0.00138117 max: 0.00176181
## median: 0.000103491
## mean: 0.0001131636
## estimated sd: 0.0002065841
## estimated skewness: 0.1527603
## estimated kurtosis: 5.842408
div.hiUKpi.vsAU <- div[which(div$piUK.piAU > 0),]
div.hiUKpi.vsUS <- div[which(div$piUK.piUS > 0),]
div.hiUKpi.both <- div.hiUKpi.vsAU[which(div$piUK.piUS > 0),]
length(div.hiUKpi.vsAU$piUK.piAU)/length(div$piUK.piAU) # % of windows that have higher pi in native
## [1] 0.7049973
length(div.hiUKpi.vsUS$piUK.piUS)/length(div$piUK.piUS)
## [1] 0.7432116
length(div.hiUKpi.both$piUK.piUS)/length(div$piUK.piUS) # % of windows with higher pi in native for both invasive ranges
## [1] 0.7432116
If drift acting on novel mutations explains most of the differentiation, then where FST is high, diversity should also be higher in the invasions. Could also be due to weaker purifying selection during population expansion.
descdist(div.hifst.AUUK$piUK.piAU)
## summary statistics
## ------
## min: -0.00119697 max: 0.00202796
## median: 3.283325e-05
## mean: 0.0001246263
## estimated sd: 0.0004012797
## estimated skewness: 0.807314
## estimated kurtosis: 4.950103
div.hifst.AUUK.hiUKpi <- div.hifst.AUUK[which(div.hifst.AUUK$piUK.piAU > 0),]
length(div.hifst.AUUK.hiUKpi$piUK.piAU)/length(div.hifst.AUUK$piUK.piAU) # % of high FST windows that have higher pi in native
## [1] 0.6224189
descdist(div.hifst.UKUS$piUK.piUS)
## summary statistics
## ------
## min: -0.00119758 max: 0.00176181
## median: 2.5496e-05
## mean: 6.588734e-05
## estimated sd: 0.0003507603
## estimated skewness: 0.6636127
## estimated kurtosis: 6.832871
div.hifst.UKUS.hiUKpi <- div.hifst.UKUS[which(div.hifst.UKUS$piUK.piUS > 0),]
length(div.hifst.UKUS.hiUKpi$piUK.piUS)/length(div.hifst.UKUS$piUK.piUS)
## [1] 0.6989619
div.hiUKpi.both <- div.hifst.UKUS.hiUKpi[which(div.hifst.AUUK.hiUKpi$piUK.piUS > 0),]
length(div.hiUKpi.both$piUK.piUS)
## [1] 295
div.hifst.AUUK.hiAUpi <- div.hifst.AUUK[which(div.hifst.AUUK$piUK.piAU < 0),]
div.hifst.UKUS.hiUSpi <- div.hifst.UKUS[which(div.hifst.UKUS$piUK.piUS < 0),]
lab.AU <- rep("AU",length(div$piUK.piAU))
lab.US <- rep("US",length(div$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div$piUK.piUS,div$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame': 40142 obs. of 2 variables:
## $ pi : num -1.04e-05 -1.83e-04 2.09e-04 8.63e-05 1.79e-04 ...
## $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_vline(xintercept=0,colour="black",size=0.5) +
geom_vline(xintercept=0.0005,colour="black",size=0.5) +
geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
theme(legend.position="none")
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_vline(xintercept=0,colour="black",size=0.5) +
geom_vline(xintercept=0.0005,colour="black",size=0.5) +
geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
theme(legend.position="none")
dev.off()
## quartz_off_screen
## 2
lab.AU <- rep("AU",length(div.hifst.AUUK$piUK.piAU))
lab.US <- rep("US",length(div.hifst.UKUS$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div.hifst.UKUS$piUK.piUS,div.hifst.AUUK$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame': 967 obs. of 2 variables:
## $ pi : num 1.88e-04 8.97e-05 1.13e-04 4.36e-04 2.19e-04 ...
## $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_vline(xintercept=0,colour="black",size=0.5) +
geom_vline(xintercept=0.0005,colour="black",size=0.5) +
geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
theme(legend.position="none")
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference_HiFst.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_vline(xintercept=0,colour="black",size=0.5) +
geom_vline(xintercept=0.0005,colour="black",size=0.5) +
geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
theme(legend.position="none")
dev.off()
## quartz_off_screen
## 2
Are regions with novel pi also highly differentiated? Expect this scatterplot to look bimodal, where shifts in diversity in either direction led to differentiation between populations.
ggplot(data=div) +
geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(-0.002,0.002) + theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
ggplot(data=div) +
geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(-0.002,0.002) + theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
ggplot(data=div) +
geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
xlab("Fst (Native vs. Invasive)") + ylab("Pi Native - Pi Invasive") +
xlim(-0.01,0.41) + ylim(-0.003,0.003) + theme_bw() +
geom_density_2d(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU), colour="#ffffff") +
geom_density_2d(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS), colour="#2c81a8") +
guides(col = guide_legend(label = TRUE, label.position = "bottom",
direction = "horizontal"))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.pdf",height=5,width=5)
ggplot(data=div) +
geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
xlab("Fst (Native vs. Invasive)") + ylab("Pi Native - Pi Invasive") +
xlim(-0.01,0.41) + ylim(-0.003,0.003) + theme_bw() +
geom_density_2d(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU), colour="#ffffff") +
geom_density_2d(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS), colour="#2c81a8") +
guides(col = guide_legend(label = TRUE, label.position = "bottom",
direction = "horizontal"))
dev.off()
## quartz_off_screen
## 2
#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.USUK.pdf",height=2,width=2)
#dev.off()
In a given window, if diversity in the invasive range is higher than that of the native range, it is possible that those variants are novel mutations. This filtering will tell us whether we should look at particular genotypes in these regions.
div.novelpi <- div %>%
filter((div$piUK.piAU < 0) & (div$piUK.piUS < 0))
# % of low native diversity SNPs are higher in diversity in both invasions
# sanity check based on calc above
length(div.novelpi$SNP)/length(div$SNP)
## [1] 0.1386578
div.novelUSpi <- div %>%
filter(div$piUK.piUS < 0)
length(div.novelUSpi$SNP)/length(div$SNP)
## [1] 0.2566888
div.novelAUpi <- div %>%
filter(div$piUK.piAU < 0)
length(div.novelAUpi$SNP)/length(div$SNP)
## [1] 0.2948533
How to test whether novel pi is evenly distributed across the genome? If even, then we expect to see a random sampling of chromosomes represented in this smaller dataset.
unique(div.novelpi$CHROM) # which chromosomes have higher invasive pi in both invasions
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00 1.25 1.75 1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00 4.50
## [25] 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
unique(div.novelUSpi$CHROM) # just in US
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00 1.25 1.75 1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00 4.50
## [25] 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
unique(div.novelAUpi$CHROM) # in AU
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 16.00 17.00 18.00 19.00 1.25 1.75
## [13] 1.00 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00
## [25] 4.50 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
length(unique(div.novelpi$CHROM))/length(unique(div$CHROM))
## [1] 0.969697
length(unique(div.novelUSpi$SNP)) # are most of these higher invasive pi regions also the moderately differentiated windows
## [1] 5152
length(unique(div.hifst.UKUS$SNP))
## [1] 289
length(unique(div.hifst.UKUS$SNP))/length(unique(div.novelUSpi$SNP))
## [1] 0.05609472
length(unique(div.novelAUpi$SNP))
## [1] 5918
length(unique(div.hifst.AUUK$SNP))
## [1] 678
length(unique(div.hifst.AUUK$SNP))/length(unique(div.novelAUpi$SNP))
## [1] 0.1145657
This is a different calculation than asking whether differentiated windows are also high in invasive diversity, since we’re drawing from different sets.
Can we color points in the manhattan plot by diff in diversity
SNP.novelUSpi <- div.novelUSpi$SNP
SNP.novelAUpi <- div.novelAUpi$SNP
SNP.novelAUpi.hifstonly <- div.hifst.AUUK.hiAUpi$SNP
SNP.novelUSpi.hifstonly <- div.hifst.UKUS.hiUSpi$SNP
length(div.hifst.UKUS.hiUSpi$SNP)
## [1] 87
length(div.hifst.AUUK.hiAUpi$SNP)
## [1] 256
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.UKUS.Manhattan.pdf",w=12,h=3)
#dev.off()
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.AUUK.Manhattan.pdf",w=12,h=3)
#dev.off()
div.outliers.hiAUpi <- div.outliers.AUUK[which(div.outliers.AUUK$piUK.piAU < 0),]
length(div.outliers.hiAUpi$SNP)
## [1] 91
length(div.outliers.hiAUpi$SNP)/length(div.outliers.AUUK$SNP)
## [1] 0.4527363
% of FST outlier windows have higher diversity in AU
div.outliers.hiUSpi <- div.outliers.USUK[which(div.outliers.USUK$piUK.piUS < 0),]
length(div.outliers.hiUSpi$SNP)
## [1] 61
length(div.outliers.hiUSpi$SNP)/length(div.outliers.USUK$SNP)
## [1] 0.3034826
% of FST outlier windows have higher diversity in US
intersect(div.outliers.hiUSpi$CHROM,div.outliers.hiAUpi$CHROM)
## [1] 12.00 1.25 1.00 2.00 3.00 6.00 0.00
unique(div.outliers.hiAUpi$CHROM)
## [1] 12.00 13.00 1.25 1.00 23.00 27.00 2.00 3.00 4.50 5.00 6.00 0.00
unique(div.outliers.hiUSpi$CHROM)
## [1] 12.00 17.00 1.25 1.00 2.00 3.00 4.00 6.00 0.00
The plots below are based on code from Gemma Clucas.
chrom2.div <- div[which(div$CHROM==2),]
length(chrom2.div$SNP) # how many windows total (need for calculating distance to centromere)
## [1] 2997
manhattan(chrom2.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom2.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom2.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom2.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
chrom2.div.small <- chrom2.div[which(chrom2.div$SNP < 8980),]
chrom2.div.small <- chrom2.div.small[which(chrom2.div.small$SNP > 8600),]
# 39200001 to 51650000
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1)) # set rows
par(mar=c(0,2,0.5,2)) # set margins for each plot
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.5))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.04))
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, col="#39C855", lwd=1)
abline(v=76290000, col="black", lwd=0.5) # centromere position
par(mar=c(1,2,1,2))
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.2))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.2))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, type="n", xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.2))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.2)) # tajima's D axis
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome2.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom6.div <- div[which(div$CHROM==6),]
# runs from 16155 to 16871
# chrom 6 = 716 50kb windows ("SNPs" here)
manhattan(chrom6.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom6.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom6.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom6.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
chrom6.div.small <- chrom6.div[which(chrom6.div$SNP < 16325),]
chrom6.div.small <- chrom6.div.small[which(chrom6.div.small$SNP > 16225),]
head(chrom6.div.small)
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 16226 6 3550001 3600000 570 0.01206770 0.008669440
## 16227 6 3600001 3650000 528 0.03079550 0.023766500
## 16228 6 3650001 3700000 701 0.02597620 0.020965100
## 16229 6 3700001 3750000 805 0.00338468 0.000400509
## 16230 6 3750001 3800000 830 0.02082790 0.017915200
## 16231 6 3800001 3850000 800 0.00589529 0.001372410
## WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 16226 0.0367649 0.0305788 0.0392762 0.0368076
## 16227 0.0501026 0.0406660 0.0739191 0.0613789
## 16228 0.0711590 0.0593933 0.0454643 0.0368937
## 16229 0.0754966 0.0604820 0.0708642 0.0562380
## 16230 0.0375712 0.0322166 0.0438655 0.0358050
## 16231 0.1001270 0.0672706 0.0743417 0.0579428
## PI_UK PI_US PI_AU TajimaD_UK TajimaD_US TajimaD_AU SNP
## 16226 0.00366513 0.00384209 0.00372947 0.592114 0.972481 0.785959 16226
## 16227 0.00349155 0.00329177 0.00364804 0.682877 0.782069 0.892366 16227
## 16228 0.00468438 0.00458936 0.00478475 0.859767 0.784092 0.972961 16228
## 16229 0.00535166 0.00537373 0.00535428 0.706416 0.819633 0.873451 16229
## 16230 0.00555363 0.00537467 0.00539254 0.840503 0.753410 0.770190 16230
## 16231 0.00531258 0.00519576 0.00516348 0.695610 0.790285 0.812897 16231
## piUK.piAU piUK.piUS
## 16226 -0.00006434 -0.00017696
## 16227 -0.00015649 0.00019978
## 16228 -0.00010037 0.00009502
## 16229 -0.00000262 -0.00002207
## 16230 0.00016109 0.00017896
## 16231 0.00014910 0.00011682
chrom6.div.hifst <- chrom6.div[which(chrom6.div$SNP < 16281),]
chrom6.div.hifst <- chrom6.div.small[which(chrom6.div.small$SNP > 16263),]
# AUUK high fst: window 5350001 to 6300001 on Chrom 6
# "SNP" 16263 to 16281
mean(chrom6.div.hifst$PI_UK)
## [1] 0.002873224
mean(chrom6.div.hifst$PI_US)
## [1] 0.002896161
mean(chrom6.div.hifst$PI_AU)
## [1] 0.00293987
chrom6.div.med <- chrom6.div[which(chrom6.div$SNP < 16450),]
chrom6.div.med <- chrom6.div.med[which(chrom6.div.med$SNP > 16155),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, type="n", xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,2.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome6.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
quartz(height=3,width=7)
options(scipen=999)
par(new=T)
## Warning in par(new = T): calling par(new=TRUE) with no plot
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.01,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome6.BroaderFSTaroundPeak.pdf", type="pdf")
## quartz_off_screen
## 2
chrom1.div <- div[which(div$CHROM==1),]
length(chrom1.div$SNP)
## [1] 2279
manhattan(chrom1.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom1.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom1.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom1.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
chrom1.div.small <- chrom1.div[which(chrom1.div$SNP < 6700),]
chrom1.div.small <- chrom1.div.small[which(chrom1.div.small$SNP > 6400),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome1.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom1A.div <- div[which(div$CHROM==1.25),]
# 2896 to 4342
# 1446 windows, 3 ticks
manhattan(chrom1A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom1A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom1A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom1A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.
chrom1A.div.small <- chrom1A.div[which(chrom1A.div$SNP < 4000),]
chrom1A.div.small <- chrom1A.div.small[which(chrom1A.div.small$SNP > 3700),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome1A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom4.div <- div[which(div$CHROM==4),]
# 13506 to 14923,
# 1417 windows, 7 ticks - peak ~500 windows from start
manhattan(chrom4.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom4.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom4.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom4.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
chrom4.div.small <- chrom4.div[which(chrom4.div$SNP < 14150),]
chrom4.div.small <- chrom4.div.small[which(chrom4.div.small$SNP > 13850),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome4.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom4A.div <- div[which(div$CHROM==4.5),]
# 13097 to 13505
# 408 windows, 4 ticks
manhattan(chrom4A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom4A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom4A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom4A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.
chrom4A.div.small <- chrom4A.div[which(chrom4A.div$SNP < 13300),]
chrom4A.div.small <- chrom4A.div.small[which(chrom4A.div.small$SNP > 13150),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome4A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2